home *** CD-ROM | disk | FTP | other *** search
- #-*-perl-*-
- # Parse "ls -lR" type listings
- # use lsparse'reset( dirname ) repeately
- # By Lee McLoughlin <lmjm@doc.ic.ac.uk>
- #
- # $Id: lsparse.pl,v 2.1 1993/06/28 15:03:08 lmjm Exp lmjm $
- # $Log: lsparse.pl,v $
- # Revision 2.1 1993/06/28 15:03:08 lmjm
- # Full 2.1 release
- #
- #
-
- # This has better be available via your PERLLIB environment variable
- require 'dateconv.pl';
-
- package lsparse;
-
- # The current directory is stripped off the
- # start of the returned pathname
- # $match is a pattern that matches this
- local( $match );
-
- # The filestore type being scanned
- $lsparse'fstype = 'unix';
-
- # Keep whatever case is on the remote system. Otherwise lowercase it.
- $lsparse'vms_keep_case = '';
-
- # A name to report when errors occur
- $lsparse'name = 'unknown';
-
- # Name of routine to call to parse incoming listing lines
- $ls_line = '';
-
- # Set the directory that is being scanned and
- # check that the scan routing for this fstype exists
- # returns false if the fstype is unknown.
- sub lsparse'reset
- {
- $here = $currdir = @_[0];
- $now = time;
- # Vms tends to give FULL pathnames reguardless of where
- # you generate the dir listing from.
- $vms_strip = $currdir;
- $vms_strip =~ s,^/+,,;
- $vms_strip =~ s,/+$,,;
-
- $ls_line = "lsparse'line_$fstype";
- return( defined( &$ls_line ) );
- }
-
- # See line_unix following routine for call/return details.
- # This calls the filestore specific parser.
- sub lsparse'line
- {
- # ls_line is setup in lsparse'reset to the name of the function
- local( $path, $size, $time, $type, $mode ) =
- eval "&$ls_line( @_ )";
-
- # Zap any leading ./ (Somehow they still creep thru.)
- $path =~ s:^(\./)+::;
- return ($path, $size, $time, $type, $mode);
- }
-
- # --------------------- parse standard Unix ls output
- # for each file or directory line found return a tuple of
- # (pathname, size, time, type, mode)
- # pathname is a full pathname relative to the directory set by reset()
- # size is the size in bytes (this is always 0 for directories)
- # time is a Un*x time value for the file
- # type is "f" for a file, "d" for a directory and
- # "l linkname" for a symlink
- sub lsparse'line_unix
- {
- local( $fh ) = @_;
- local( $non_crud, $perm_denied );
-
- if( eof( $fh ) ){
- return( "", 0, 0, 0 );
- }
-
- while( <$fh> ){
- # Stomp on carriage returns
- s/\015//g;
-
- # I'm about to look at this at lot
- study;
-
- # Try and spot crud in the line and avoid it
- # You can get:
- # -rw-r--r-ls: navn/internett/RCS/nsc,v: Permission denied
- # ls: navn/internett/RCS/bih,v: Permission denied
- # - 1 43 daemon 1350 Oct 28 14:03 sognhs
- # -rwcannot access .stuff/incoming
- # cannot access .stuff/.cshrc
- if( m%^(.*)/bin/ls:.*Permission denied% ||
- m%^(.*)ls:.*Permission denied% ||
- m%^(.*)(cannot|can not) access % ){
- if( ! $non_crud ){
- $non_crud = $1;
- }
- next;
- }
- # Also try and spot non ls "Permission denied" messages. These
- # are a LOT harder to handle as the key part is at the end
- # of the message. For now just zap any line containing it
- # and the first line following (as it will PROBABLY have been broken).
- #
- if( /.:\s*Permission denied/ ){
- $perm_denied = 1;
- next;
- }
- if( $perm_denied ){
- $perm_denied = "";
- warn "Warning: input corrupted by 'Permission denied'",
- "errors, about line $. of $lsparse'name\n";
- next;
- }
- # Not found's are like Permission denied's. They can start part
- # way through a line but with no way of spotting where they begin
- if( /not found/ ){
- $not_found = 1;
- next;
- }
- if( $not_found ){
- $not_found = "";
- warn "Warning: input corrupted by 'not found' errors",
- " about line $. of $lsparse'name\n";
- next;
- }
-
- if( $non_crud ){
- $_ = $non_crud . $_;
- $non_crud = "";
- }
-
- if( /^([\-lrwxsSt]{10}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/ ){
- local( $kind, $size, $lsdate, $file ) = ($1, $2, $3, $5);
-
- if( $file eq '.' || $file eq '..' ){
- next;
- }
-
- local( $time ) = &main'lstime_to_time( $lsdate );
- local( $type ) = '?';
- local( $mode ) = 0;
-
- # This should be a symlink
- if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){
- $file = $1;
- $type = "l $2";
- }
- elsif( $kind =~ /^-/ ){
- # (hopefully) a regular file
- $type = 'f';
- }
-
- $mode = &chars_to_mode( $kind );
-
- $currdir =~ s,/+,/,g;
- $file =~ s,^/$match,,;
- $file = "/$currdir/$file";
- $file =~ s,/+,/,g;
- return( substr( $file, 1 ), $size, $time, $type, $mode );
- }
- # Match starts of directories. Try not to match
- # directories whose naes ending in :
- elsif( /^([\.\/]*.*):$/ && ! /^[dcbsp].*\s.*\s.*:$/ ){
- if( $1 eq '.' ){
- next;
- }
- elsif( $1 !~ /^\// ){
- $currdir = "$here/$1";
- }
- else {
- $currdir = "$1";
- }
- $currdir =~ s,/+,/,g;
- $match = $currdir;
- $match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
- return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
- }
- elsif( /^[dcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
- ;
- }
- elsif( /^.*[Uu]pdated.*:/ ){
- # Probably some line like:
- # Last Updated: Tue Oct 8 04:30:50 EDT 1991
- # skip it
- next;
- }
- elsif( /^([\.\/]*[^\s]*)/ ){
- # Just for the export.lcs.mit.edu ls listing
- $match = $currdir = "$1/";
- $match =~ s/[\+\(\[\*\?]/\\$1/g;
- }
- else {
- printf( "Unmatched line: %s", $_ );
- }
- }
- return( '', 0, 0, 0, 0 );
- }
-
- # Convert the mode chars at the start of an ls-l entry into a number
- sub chars_to_mode
- {
- local( $chars ) = @_;
- local( @kind, $c );
-
- # Split and remove first char
- @kind = split( //, $kind );
- shift( @kind );
-
- foreach $c ( @kind ){
- $mode <<= 1;
- if( $c ne '-' && $c ne 'S' && $c ne 'T' ){
- $mode |= 1;
- }
- }
-
- # check for "special" bits
-
- # uid bit
- if( /^...s....../i ){
- $mode |= 04000;
- }
-
- # gid bit
- if( /^......s.../i ){
- $mode |= 02000;
- }
-
- # sticky bit
- if( /^.........t/i ){
- $mode |= 01000;
- }
-
- return $mode;
- }
-
- # --------------------- parse dls output
-
- # dls is a descriptive ls that some sites use.
- # this parses the output of dls -dtR
-
- # for each file or directory line found return a tuple of
- # (pathname, size, time, type, mode)
- # pathname is a full pathname relative to the directory set by reset()
- # size is the size in bytes (this is always 0 for directories)
- # time is a Un*x time value for the file
- # type is "f" for a file, "d" for a directory and
- # "l linkname" for a symlink
- sub lsparse'line_dls
- {
- local( $fh ) = @_;
- local( $non_crud, $perm_denied );
-
- if( eof( $fh ) ){
- return( "", 0, 0, 0 );
- }
-
- while( <$fh> ){
- # Stomp on carriage returns
- s/\015//g;
-
- # I'm about to look at this at lot
- study;
-
- if( /^(\S*)\s+(\-|\=|\d+)\s+((\w\w\w\s+\d+|\d+\s+\w\w\w)\s+(\d+:\d+|\d\d\d\d))\s+(.+)\n/ ){
- local( $file, $size, $lsdate, $description ) =
- ($1, $2, $3, $6);
- $file =~ s/\s+$//;
- local( $time, $type, $mode );
-
- if( $file =~ m|/$| ){
- # a directory
- $file =~ s,/$,,;
- $time = 0;
- $type = 'd';
- $mode = 0555;
- }
- else {
- # a file
- $time = &main'lstime_to_time( $lsdate );
- $type = 'f';
- $mode = 0444;
- }
-
- # Handle wrapped long filenames
- if( $filename ne '' ){
- $file = $filename;
- }
- $filename = '';
-
- $file =~ s/\s*$//;
- $file = "$currdir/$file";
- $file =~ s,/+,/,g;
- return( substr( $file, 1 ), $size, $time, $type, $mode );
- }
- elsif( /^(.*):$/ ){
- if( $1 eq '.' ){
- next;
- }
- elsif( $1 !~ /^\// ){
- $currdir = "$here/$1/";
- }
- else {
- $currdir = "$1/";
- }
- $filename = '';
- $currdir =~ s,/+,/,g;
- $match = $currdir;
- $match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
- return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
- }
- else {
- # If a filename is long then it is on a line by itself
- # with the details on the next line
- chop( $filename = $_ );
- }
- }
- return( '', 0, 0, 0, 0 );
- }
-
- # --------------------- parse netware output
-
- # For each file or directory line found return a tuple of
- # (pathname, size, time, type, mode)
- # pathname is a full pathname relative to the directory set by reset()
- # size is the size in bytes (this is always 0 for directories)
- # time is a Un*x time value for the file
- # type is "f" for a file, "d" for a directory and
- # "l linkname" for a symlink
- sub lsparse'line_netware
- {
- local( $fh ) = @_;
-
- if( eof( $fh ) ){
- return( "", 0, 0, 0 );
- }
-
- while( <$fh> ){
- # Stomp on carriage returns
- s/\015//g;
- # Unix vs NetWare:
- #1234567890 __________.*_____________ d+ www dd dddd (.*)\n
- #drwxr-xr-x 2 jrd other 512 Feb 29 1992 vt100
- # kind size lsdate file
- #123456789012sw+ ____.*_______\s+(\d+) \s+ wwwsddsdd:dd\s+ (.*)\n
- #- [R----F--] jrd 197928 Sep 25 15:19 kermit.exe
- #d [R----F--] jrd 512 Oct 06 09:31 source
- #d [RWCEAFMS] jrd 512 Sep 04 14:38 lwp
-
- if( /^([d|l|\-]\s\[[RWCEAFMS\-]{8}\])\s\w+\s+(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)\n/) {
- local( $kind, $size, $lsdate, $file ) =
- ( $1, $2, $3, $5);
- if( $file eq '.' || $file eq '..' ){
- next;
- }
- local( $time ) = &main'lstime_to_time( $lsdate );
- local( $type ) = '?';
- local( $mode ) = 0;
-
- # This should be a symlink
- if( $kind =~ /^l/ && $file =~ /(.*) -> (.*)/ ){
- $file = $1;
- $type = "l $2";
- }
- elsif( $kind =~ /^-/ ){
- # (hopefully) a regular file
- $type = 'f';
- }
-
- $mode = &netware_to_mode( $kind );
-
- if( $kind =~ /^d/ ) {
- # a directory
- $type = 'd';
- $size = 0; # Don't believe the report size
- }
- $currdir =~ s,/+,/,g;
- $file =~ s,^/$match,,;
- $file = "/$currdir/$file";
- $file =~ s,/+,/,g;
- return( substr( $file, 1 ), $size, $time, $type, $mode );
- }
-
- elsif( /^[dcbsp].*[^:]$/ || /^\s*$/ || /^[Tt]otal.*/ || /[Uu]nreadable$/ ){
- ;
- }
- elsif( /^.*[Uu]pdated.*:/ ){
- # Probably some line like:
- # Last Updated: Tue Oct 8 04:30:50 EDT 1991
- # skip it
- next;
- }
- else {
- printf( "Unmatched line: %s", $_ );
- return( '', 0, 0, 0, 0 );
- }
- }
- return( '', 0, 0, 0, 0 );
- }
-
- # Convert NetWare file access mode chars at the start of a DIR entry
- # into a Unix access number.
- sub netware_to_mode
- {
- local( $chars ) = @_;
- local( @kind, $c );
-
- # Split and remove first three characters
- @kind = split( //, $kind );
- shift( @kind ); # omit directory "d" field
- shift( @kind ); # omit space separator
- shift( @kind ); # omit left square bracket
- $mode = 0; # init $mode to no access
-
- foreach $c ( @kind ){
- if( $c eq 'R' ) {$mode |= 0x644;} ## r/w r r
- if( $c eq 'W' ) {$mode |= 0x222;} ## w w w
- if( $c eq 'F' ) {$mode |= 0x444;} ## r r r
- }
- return $mode;
- }
- # --------------------- parse VMS dir output
- # for each file or directory line found return a tuple of
- # (pathname, size, time, type, mode)
- # pathname is a full pathname relative to the directory set by reset()
- # size is the size in bytes (this is always 0 for directories)
- # time is a Un*x time value for the file
- # type is "f" for a file, "d" for a directory and
- # "l linkname" for a symlink
- sub lsparse'line_vms
- {
- local( $fh ) = @_;
- local( $non_crud, $perm_denied );
-
- if( eof( $fh ) ){
- return( "", 0, 0, 0 );
- }
-
- while( <$fh> ){
- # Stomp on carriage returns
- s/\015//g;
-
- # I'm about to look at this at lot
- study;
-
- if( /^\s*$/ ){
- next;
- }
-
- if( /^\s*Total of/i ){
- # Just a size report ignore
- next;
- }
-
- if( /\%RMS-E-PRV|insufficient privilege/i ){
- # A permissions error - skip the line
- next;
- }
-
- # Upper case is so ugly
- if( ! $lsparse'vms_keep_case ){
- tr/A-Z/a-z/;
- }
-
- # DISK$ANON:[ANONYMOUS.UNIX]
- if( /^([^:]+):\[([^\]+]+)\]\s*$/ ){
- # The directory name
- # Use the Unix convention of /'s in filenames not
- # .'s
- $currdir = '/' . $2;
- $currdir =~ s,\.,/,g;
- $currdir =~ s,/+,/,g;
- $currdir =~ s,^/$vms_strip,,;
- if( $currdir eq '' ){
- next;
- }
- $match = $currdir;
- $match =~ s/([\+\(\)\[\]\*\?])/\\$1/g;
- #print ">>>match=$match currdir=$currdir\n";
- return( substr( $currdir, 1 ), 0, 0, 'd', 0 );
- }
-
- # MultiNet FTP
- # DSPD.MAN;1 9 1-APR-1991 12:55 [SG,ROSENBLUM] (RWED,RWED,RE,RE)
- # CMU/VMS-IP FTP
- # [VMSSERV.FILES]ALARM.DIR;1 1/3 5-MAR-1993 18:09
- local( $dir, $file, $vers, $size, $lsdate, $got );
- $got = 0;
- # For now ignore user and mode
- if( /^((\S+);(\d+))?\s+(\d+)\s+(\d+-\S+-\d+\s+\d+:\d+)/ ){
- ($file, $vers, $size, $lsdate) = ($2,$3,$4,$5);
- $got = 1;
- }
- elsif( /^(\[([^\]]+)\](\S+);(\d+))?\s+\d+\/\d+\s+(\d+-\S+-\d+\s+\d+:\d+)\s*$/ ){
- ($dir,$file,$vers,$lsdate) = ($2,$3,$4,$5);
- $got = 1;
- }
- # The sizes mean nothing under unix...
- $size = 0;
-
- if( $got ){
- local( $time ) = &main'lstime_to_time( $lsdate );
- local( $type ) = 'f';
- local( $mode ) = 0444;
-
- # Handle wrapped long filenames
- if( $filename ne '' ){
- $file = $filename;
- $vers = $version;
- if( $directory ){
- $dir = $directory;
- }
- }
- if( defined( $dir ) ){
- $dir =~ s/\./\//g;
- $file = $dir . '/' . $file;
- }
- $filename = '';
-
- if( $file =~ /^(.*)\.dir(;\d+)?$/ ){
- if( ! $vms_keep_dotdir ){
- $file = $1 . $2;
- }
- $type = 'd';
- $mode = 0555;
- }
-
- $lsparse'vers = $vers;
-
- #print "file=|$file| match=|$match| vms_strip=|$vms_strip|\n";
- $file =~ s,^,/,;
- $file =~ s,^/$match,,;
- if( ! defined( $dir ) ){
- $file = "$currdir/$file";
- }
- $file =~ s,^$vms_strip,,;
- $file =~ s,/+,/,g;
- #print "file=|$file|\n";
- return( substr( $file, 1 ), $size, $time, $type, $mode );
- }
- elsif( /^\[([^\]]+)\](\S+);(\d+)\s*$/ ){
- # If a filename is long then it is on a line by itself
- # with the details on the next line
- local( $d, $f, $v ) = ($1, $2, $3);
- $d =~ s/\./\//g;
- $directory = $d;
- $filename = $f;
- $version = $v;
- }
- elsif( /^(\S+);(\d+)\s*$/ ){
- # If a filename is long then it is on a line by itself
- # with the details on the next line
- $filename = $1;
- $version = $2;
- }
- else {
- printf( "Unmatched line: %s", $_ );
- }
- }
- return( '', 0, 0, 0, 0 );
- }
-
- # -----
- 1;
-